package com.hbctcf.news.international.processor;

import com.hbctcf.news.international.utils.Constants;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

public class WallstreetcnProcessor implements PageProcessor {

	private Site site = Site.me().setRetryTimes(3).setTimeOut(10000);
	
	private static final String articlesList = "https://wallstreetcn\\.com/articles/\\d+";
	
	private static final String premiumList = "https://wallstreetcn\\.com/premium/articles/\\d+";
	
	@Override
	public void process(Page page) {
		System.out.println("<--->"+page.getUrl());
		if(Constants.WALLSTREET_TARGET_URL.equals(page.getUrl().toString())){
			page.addTargetRequests(page.getHtml().
					xpath("//div[@class='home-main-placeholder']/div[@class='home-main']/div[@class='home-articles']/div[@class='wscn-tabs']/div[@class='wscn-tabs__content']/div[@class='wscn-tab-pane']/div/div[@class='home-news-item']/a[@class='home-news-item__cover']").links().all());
			page.setSkip(true);
		} else if(page.getUrl().regex(premiumList).match()){
			page.putField("title", page.getHtml().xpath("//main/div[@class='premium-article-wrapper']/div[@class='premium-article']/div[@class='premium-article__heading']/div[@class='premium-article__heading__cover']/div[@class='cover-topic-title']/text()").toString());
			page.putField("content", page.getHtml().xpath("//main/div[@class='premium-article-wrapper']/div[@class='premium-article']/div[@class='pa-main']/div[@class='pa-main__centre large']/div[@class='pa-main__content preview']/tidyText()").toString());
		} else if(page.getUrl().regex(articlesList).match()){
			page.putField("title", page.getHtml().xpath("//main/div[@class='article-wrapper']/div[@class='article main-article']/div[@class='article__heading']/div[@class='article__heading__title']/text()").toString());
			page.putField("content", page.getHtml().xpath("//main/div[@class='article-wrapper']/div[@class='article main-article']/div[@class='article__content']/div[@class='node-article-content']/tidyText()").toString());
		} else {
			System.out.println("<===>"+page.getUrl());
			page.setSkip(true);
		}
		

	}

	@Override
	public Site getSite() {
		return site;
	}

}
